\documentclass[11pt,a4paper,]{} \usepackage{lmodern} \usepackage{amssymb,amsmath} \usepackage{ifxetex,ifluatex} \usepackage{fixltx2e} % provides \textsubscript \ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex \usepackage[T1]{fontenc} \usepackage[utf8]{inputenc} \else % if luatex or xelatex \usepackage{unicode-math} \defaultfontfeatures{Ligatures=TeX,Scale=MatchLowercase} \fi % use upquote if available, for straight quotes in verbatim environments \IfFileExists{upquote.sty}{\usepackage{upquote}}{} % use microtype if available \IfFileExists{microtype.sty}{% \usepackage[]{microtype} \UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts }{} \PassOptionsToPackage{hyphens}{url} % url is loaded by hyperref \usepackage[unicode=true]{hyperref} \hypersetup{ pdfborder={0 0 0}, breaklinks=true} \urlstyle{same} % don't use monospace font for urls \usepackage{geometry} \geometry{a4paper, centering, text={16cm,24cm}} \IfFileExists{parskip.sty}{% \usepackage{parskip} }{% else \setlength{\parindent}{0pt} \setlength{\parskip}{6pt plus 2pt minus 1pt} } \setlength{\emergencystretch}{3em} % prevent overfull lines \providecommand{\tightlist}{% \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}} \setcounter{secnumdepth}{0} % set default figure placement to htbp \makeatletter \def\fps@figure{htbp} \makeatother \title{Report: Australian Census Data} \providecommand{\subtitle}[1]{} \subtitle{ETC5513 Assignment 4: Star Wars} %% MONASH STUFF %% CAPTIONS \RequirePackage{caption} \DeclareCaptionStyle{italic}[justification=centering] {labelfont={bf},textfont={it},labelsep=colon} \captionsetup[figure]{style=italic,format=hang,singlelinecheck=true} \captionsetup[table]{style=italic,format=hang,singlelinecheck=true} %% FONT \RequirePackage{bera} \RequirePackage[charter,expert,sfscaled]{mathdesign} \RequirePackage{fontawesome} %% HEADERS AND FOOTERS \RequirePackage{fancyhdr} \pagestyle{fancy} \rfoot{\Large\sffamily\raisebox{-0.1cm}{\textbf{\thepage}}} \makeatletter \lhead{\textsf{\expandafter{\@title}}} \makeatother \rhead{} \cfoot{} \setlength{\headheight}{15pt} \renewcommand{\headrulewidth}{0.4pt} \renewcommand{\footrulewidth}{0.4pt} \fancypagestyle{plain}{% \fancyhf{} % clear all header and footer fields \fancyfoot[C]{\sffamily\thepage} % except the center \renewcommand{\headrulewidth}{0pt} \renewcommand{\footrulewidth}{0pt}} %% MATHS \RequirePackage{bm,amsmath} \allowdisplaybreaks %% GRAPHICS \RequirePackage{graphicx} \setcounter{topnumber}{2} \setcounter{bottomnumber}{2} \setcounter{totalnumber}{4} \renewcommand{\topfraction}{0.85} \renewcommand{\bottomfraction}{0.85} \renewcommand{\textfraction}{0.15} \renewcommand{\floatpagefraction}{0.8} %\RequirePackage[section]{placeins} %% SECTION TITLES %% SECTION TITLES (NEW: Changing sections and subsections color) \RequirePackage[compact,sf,bf]{titlesec} \titleformat*{\section}{\Large\sf\bfseries\color[rgb]{0.8, 0.7, 0.1 }} \titleformat*{\subsection}{\large\sf\bfseries\color[rgb]{0.8, 0.7, 0.1 }} \titleformat*{\subsubsection}{\sf\bfseries\color[rgb]{0.8, 0.7, 0.1 }} \titlespacing{\section}{0pt}{2ex}{.5ex} \titlespacing{\subsection}{0pt}{1.5ex}{0ex} \titlespacing{\subsubsection}{0pt}{.5ex}{0ex} %% TITLE PAGE \def\Date{\number\day} \def\Month{\ifcase\month\or January\or February\or March\or April\or May\or June\or July\or August\or September\or October\or November\or December\fi} \def\Year{\number\year} %% LINE AND PAGE BREAKING \sloppy \clubpenalty = 10000 \widowpenalty = 10000 \brokenpenalty = 10000 \RequirePackage{microtype} %% PARAGRAPH BREAKS \setlength{\parskip}{1.4ex} \setlength{\parindent}{0em} %% HYPERLINKS \RequirePackage{xcolor} % Needed for links \definecolor{darkblue}{rgb}{0,0,.6} \RequirePackage{url} \makeatletter \@ifpackageloaded{hyperref}{}{\RequirePackage{hyperref}} \makeatother \hypersetup{ citecolor=0 0 0, breaklinks=true, bookmarksopen=true, bookmarksnumbered=true, linkcolor=darkblue, urlcolor=blue, citecolor=darkblue, colorlinks=true} \usepackage[showonlyrefs]{mathtools} \usepackage[no-weekday]{eukdate} %% BIBLIOGRAPHY \makeatletter \@ifpackageloaded{biblatex}{}{\usepackage[style=authoryear-comp, backend=biber, natbib=true]{biblatex}} \makeatother \ExecuteBibliographyOptions{bibencoding=utf8,minnames=1,maxnames=3, maxbibnames=99,dashed=false,terseinits=true,giveninits=true,uniquename=false,uniquelist=false,doi=false, isbn=false,url=true,sortcites=false} \DeclareFieldFormat{url}{\texttt{\url{#1}}} \DeclareFieldFormat[article]{pages}{#1} \DeclareFieldFormat[inproceedings]{pages}{\lowercase{pp.}#1} \DeclareFieldFormat[incollection]{pages}{\lowercase{pp.}#1} \DeclareFieldFormat[article]{volume}{\mkbibbold{#1}} \DeclareFieldFormat[article]{number}{\mkbibparens{#1}} \DeclareFieldFormat[article]{title}{\MakeCapital{#1}} \DeclareFieldFormat[article]{url}{} %\DeclareFieldFormat[book]{url}{} %\DeclareFieldFormat[inbook]{url}{} %\DeclareFieldFormat[incollection]{url}{} %\DeclareFieldFormat[inproceedings]{url}{} \DeclareFieldFormat[inproceedings]{title}{#1} \DeclareFieldFormat{shorthandwidth}{#1} %\DeclareFieldFormat{extrayear}{} % No dot before number of articles \usepackage{xpatch} \xpatchbibmacro{volume+number+eid}{\setunit*{\adddot}}{}{}{} % Remove In: for an article. \renewbibmacro{in:}{% \ifentrytype{article}{}{% \printtext{\bibstring{in}\intitlepunct}}} \AtEveryBibitem{\clearfield{month}} \AtEveryCitekey{\clearfield{month}} \makeatletter \DeclareDelimFormat[cbx@textcite]{nameyeardelim}{\addspace} \makeatother \author{\sf\Large\textbf{ Mohammed Faizan}\\ {\sf\large MBAt\\[0.5cm]} \sf\Large\textbf{ Adarsh More}\\ {\sf\large MBAt\\[0.5cm]} \sf\Large\textbf{ Yanhui LI}\\ {\sf\large MBAt\\[0.5cm]}} \date{\sf\Date~\Month~\Year} \makeatletter \lfoot{\sf Faizan, More, LI: \@date} \makeatother %%%% PAGE STYLE FOR FRONT PAGE OF REPORTS \makeatletter \def\organization#1{\gdef\@organization{#1}} \def\telephone#1{\gdef\@telephone{#1}} \def\email#1{\gdef\@email{#1}} \makeatother \organization{Monash University} \def\name{Our consultancy - Star WarsMohammed Faizan &Adarsh More&Yanhui LI} \telephone{(03) 9905 2478} \email{questions@company.com} %NEW: New email addresss \def\webaddress{\url{http://company.com/stats/consulting/}} %NEW: URl \def\abn{12 377 614 630} % NEW: ABN \def\logo{\includegraphics[width=6cm]{Figures/logo}} %NEW: Changing logo \def\extraspace{\vspace*{1.6cm}} \makeatletter \def\contactdetails{\faicon{phone} & \@telephone \\ \faicon{envelope} & \@email} \makeatother %%%% FRONT PAGE OF REPORTS \def\reporttype{Report for} \long\def\front#1#2#3{ \newpage \begin{singlespacing} \thispagestyle{empty} \vspace*{-1.4cm} \hspace*{-1.4cm} \hbox to 16cm{ \hbox to 6.5cm{\vbox to 14cm{\vbox to 25cm{ \logo \vfill \parbox{6.3cm}{\raggedright \sf\color[rgb]{0.8, 0.7, 0.1 } % NEW color {\large\textbf{\name}}\par \vspace{.7cm} \tabcolsep=0.12cm\sf\small \begin{tabular}{@{}ll@{}}\contactdetails \end{tabular} \vspace*{0.3cm}\par ABN: \abn\par } }\vss}\hss} \hspace*{0.2cm} \hbox to 1cm{\vbox to 14cm{\rule{4pt}{26.8cm}\vss}\hss\hfill} %NEW: Thicker line \hbox to 10cm{\vbox to 14cm{\vbox to 25cm{ \vspace*{3cm}\sf\raggedright \parbox{11cm}{\sf\raggedright\baselineskip=1.2cm \fontsize{24.88}{30}\color[rgb]{0, 0.29, 0.55}\sf\textbf{#1}} % NEW: title color blue \par \vfill \large \vbox{\parskip=0.8cm #2}\par \vspace*{2cm}\par \reporttype\\[0.3cm] \hbox{#3}%\\[2cm]\ \vspace*{1cm} {\large\sf\textbf{\Date~\Month~\Year}} }\vss} }} \end{singlespacing} \newpage } \makeatletter \def\titlepage{\front{\expandafter{\@title}}{\@author}{\@organization}} \makeatother \usepackage{setspace} \setstretch{1.5} <<<<<<< HEAD <<<<<<< HEAD ======= >>>>>>> a05fdccabc3b1bece74ea828d66bbab9232b06c6 %% Any special functions or other packages can be loaded here. \AtBeginDocument{\addtocontents{toc}{\protect\thispagestyle{empty}}} \usepackage{capt-of} \usepackage{graphicx} \usepackage{url} \usepackage{float} ======= <<<<<<< HEAD >>>>>>> fc57fe83b5555588ff478f7a4d72bf7679cd7c6a ======= >>>>>>> c0dd0a2ac40797dee5be7afce1917377fcaea3cb >>>>>>> dccab1bb54ad0ece55764a1ffd96a1ae6c9e32d7 \begin{document} \titlepage <<<<<<< HEAD <<<<<<< HEAD <<<<<<< HEAD ======= ======= { \setcounter{tocdepth}{} \tableofcontents } ======= >>>>>>> a05fdccabc3b1bece74ea828d66bbab9232b06c6
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
## 
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
<<<<<<< HEAD =======
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
## 
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
## 
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
>>>>>>> de6ccec71946224bd62212ac6377ff2c1d5cfdfc
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
>>>>>>> report <<<<<<< HEAD >>>>>>> fc57fe83b5555588ff478f7a4d72bf7679cd7c6a ======= >>>>>>> c0dd0a2ac40797dee5be7afce1917377fcaea3cb >>>>>>> dccab1bb54ad0ece55764a1ffd96a1ae6c9e32d7

======= >>>>>>> de6ccec71946224bd62212ac6377ff2c1d5cfdfc
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double()
## )
<<<<<<< HEAD
=======
## ℹ Use `spec()` for the full column specifications.
Education level count by gender

(#fig:edu_gender)Education level count by gender

## `summarise()` has grouped output by 'SA4_CODE_2016'. You can override using the `.groups` argument.
Population distribution of education level

Figure 1: Population distribution of education level

Best education level of each region

Figure 2: Best education level of each region

## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
## 
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double()
## )
## ℹ Use `spec()` for the full column specifications.

## `summarise()` has grouped output by 'SA4_CODE_2016'. You can override using the `.groups` argument.
Population distribution of field

Figure 3: Population distribution of field

Best field of each region

Figure 4: Best field of each region

## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double()
## )
<<<<<<< HEAD
>>>>>>> fc57fe83b5555588ff478f7a4d72bf7679cd7c6a
=======
>>>>>>> c0dd0a2ac40797dee5be7afce1917377fcaea3cb
>>>>>>> dccab1bb54ad0ece55764a1ffd96a1ae6c9e32d7
## ℹ Use `spec()` for the full column specifications.
## 
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
## 
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double()
## )
## ℹ Use `spec()` for the full column specifications.

The Australian Bureau of Statistics(ABS) conducts the census for Australia every 5 years which includes all people present in Australia on the census night irrespective of their nationality. Wikipedia contributors (2021) defines census as “systematically calculating, acquiring and recording information about the members of a given population. This term is used mostly in connection with national population and housing censuses.” A census aims to include the entire population as supposed to sampling and therefore data is recorded for every individual. However, when this data is released in public for interested institutions such as businesses, other government organizations, NGOs and other researchers it is only ethical to de-identify the data. Ethics has always been argued for risk versus benefit. With census data capturing personal details it must be de-identified and therefore ABS makes it available after perturbation as aggregated dataConfidentiality.

Census being a population data is able to capture insights about small geographic boundaries and demographics precisely. The 2016 Census data was output using the 2016 Australian Statistical Geography Standard (ASGS).The ABS Structures are a hierarchy of areas developed for the release of ABS statistical information. This statistical information represents data for all census geographies from Australia down to Statistical Area Level 1.@CensusDefinition say “Data can be represented visually or analyzed in complex statistical models, to show the difference between certain areas, or to understand the association between different personal characteristics.”

Our report is based on 2016 census data from the Australian Bureau of Statistics(ABS). In 2016, Census collected data for 10 million dwellings and approximately 24 million people, the largest number counted to date. The report dwells on the SA4 regions of Victoria and the topics for analysis are the Field of Study, Education Qualifications, Industry of Employment and Occupation. We try to determine the association between these topics based on age and gender. To further support these association insights, data from the Victorian Public Sector Commission(VPSC) is included.

The census data was not in accordance to the tidy data definition and was spread across multiple files. Datapacks are provided in CSV format. Geopacks include comprehensive data files and associated Geographic Information System (GIS) boundary files in a format suitable for loading into proprietary software and/or client custom-built systems. Hence appropriate cleaning was performed and cell values were renamed in a more human readable context.

The map 1 is population density map which shows the population concentration in each of the SA4 regions. The ABS divides the geographical areas on the basis of the population density such that each region has comparable densities irrespective of their area/size of the region. There are nineteen SA4 regions in Victoria each represented on the map with a number starting with the state code of Victoria: 2. Ten regions out of nineteen exist in Melbourne city suggesting that most people in Victoria reside in Melbourne and the country side of Victoria is sparsely populated. Precise numbers are present in Table 1. Regions in Melbourne have higher population with exception of region 297 and 299 having a population of 9 and 1994 respectively. Male and female population is comparable in all regions, however male population was higher in all regions.

## `summarise()` has grouped output by 'SA4_CODE_2016'. You can override using the `.groups` argument.
## Joining, by = "SA4_CODE_2016"
<<<<<<< HEAD <<<<<<< HEAD

<<<<<<< HEAD
=======
<<<<<<< HEAD >>>>>>> fc57fe83b5555588ff478f7a4d72bf7679cd7c6a ======= >>>>>>> c0dd0a2ac40797dee5be7afce1917377fcaea3cb >>>>>>> dccab1bb54ad0ece55764a1ffd96a1ae6c9e32d7

=======

=======
Map: Victoria Population

Figure 1: Map: Victoria Population

>>>>>>> a05fdccabc3b1bece74ea828d66bbab9232b06c6
Table 1: Victoriqn Population
SA4_CODE_2016 femalepopulation malepopulation population
201 32726 34691 67417
202 32396 34054 66450
203 60660 64307 124967
204 35934 39614 75548
205 52929 57572 110501
206 159362 160819 320181
207 81814 86786 168600
208 96482 101671 198153
209 109370 122195 231565
210 71224 85167 156391
211 118179 129501 247680
212 151481 184164 335645
213 147830 178340 326170
214 62731 68190 130921
215 29867 33492 63359
216 25915 28796 54711
217 26236 29297 55533
297 0 9 9
299 765 1229 1994

Age distribution for Victoria is represent by the density plot, Figure 2.

Age Distribution

Figure 2: Age Distribution

To study the association between the topics of study, the figure is a bar chart that shows the population in the sub-divisions of each topic arranged in decreasing order representing both male and female population. The following inferences were made by comparing the individual plots that each represent a topic of study.

The Figure shows the total populations for sub-divisions within each topic.

A linear model to determine the male to female population ratio is fit for the male and female population with respect to occupations and industries. A line(slope=1, black line) decides the value of this ratio. The models above this line have a higher female population and the models below this line have a higher male population. These models are shown in Figure and Figure .

In Victoria, for any particular education level, more women have achieved it than men. More women are educated(ratio F:M):

Whereas, for people having a qualification of certificate level 3 and 4, men far exceed women.

Male and female population is comparable with respect to professionals(a major occupation), however females have a higher ratio.

More women are employed as(ratio F:M):

Whereas, more men are employed as(ratio M:F):

Male and female population is comparable with respect to professionals(a major occupation), however females have a higher ratio. Probable reason is the low education levels of men as seen above.

More women studied in many fields of which most important are(ratio F:M):

Whereas, more men studied(ratio M:F):

More women are employed in(ratio F:M): Probable reason is their field of study.

Whereas, more men are employed in other industries of which most significant are(ratio M:F): Probable reason is their field of study.

Male and female population is comparable in Administrative and support services, Financial and insurance services, Accommodation and food services and Professional scientific and technical services, however males have a higher ratio.

<<<<<<< HEAD

0.0.12 Population by Education

## `summarise()` has grouped output by 'educationlevel'. You can override using the `.groups` argument.
## Joining, by = c("node", "category")

0.0.13 Population by Industries

>>>>>>> de6ccec71946224bd62212ac6377ff2c1d5cfdfc
## `summarise()` has grouped output by 'industry'. You can override using the `.groups` argument.
## Joining, by = c("node", "category")

0.0.14 Population by Field

## `summarise()` has grouped output by 'field'. You can override using the `.groups` argument.
## Joining, by = c("node", "category")
<<<<<<< HEAD

Spatial Industry Distribution

<<<<<<< HEAD Figure 1: Spatial Industry Distribution ======= Figure 5: Spatial Industry Distribution <<<<<<< HEAD >>>>>>> fc57fe83b5555588ff478f7a4d72bf7679cd7c6a ======= >>>>>>> c0dd0a2ac40797dee5be7afce1917377fcaea3cb >>>>>>> dccab1bb54ad0ece55764a1ffd96a1ae6c9e32d7

## `summarise()` has grouped output by 'SA4_CODE_2016'. You can override using the `.groups` argument.

<<<<<<< HEAD
=======
<<<<<<< HEAD >>>>>>> fc57fe83b5555588ff478f7a4d72bf7679cd7c6a ======= >>>>>>> c0dd0a2ac40797dee5be7afce1917377fcaea3cb >>>>>>> dccab1bb54ad0ece55764a1ffd96a1ae6c9e32d7

=======

0.0.15 Population by Occupation

>>>>>>> de6ccec71946224bd62212ac6377ff2c1d5cfdfc
## `summarise()` has grouped output by 'occupation'. You can override using the `.groups` argument.
## Joining, by = c("node", "category")

=======

A network plot is a representation of the relationship between vertices and the strength of this relationship is determined by the edge weight(width, opacity, length, etc). In order to understand the relation between the topics and the age, the Figure is a network plot representing the relationships between the sub-divisions within each topic and the age of the people. The edge weight is determined by the population size that belongs to that connection.The network graphs are based on the population and distribution can be compared only within each age group since different age groups have different populations.

First inferences from these networks are:

The following tables presents the age group with highest population for every sector and the age group [25-35) is found to be dominating every sector owing to the fact that this age group is the highest population of Victoria.

>>>>>>> a05fdccabc3b1bece74ea828d66bbab9232b06c6
## `summarise()` has grouped output by 'afq_level'. You can override using the `.groups` argument.
Table 2: Education: Population
afq_level age_min population
Level 1 & 2 15 9402
Level 3 & 4 25 146297
Level 5 & 6 25 96920
Level 7 25 245613
Level 9 25 83204
Not Stated 25 70455
Level 8 35 28908
## `summarise()` has grouped output by 'industry'. You can override using the `.groups` argument.
Table 3: Industry: Population
industry age_min population
Accommodation_and_food_services 25 42103
Administrative_and_support_services 25 23086
Arts_and_recreation_services 25 13149
Construction 25 61959
Electricity_gas_water_and_waste_service 25 8039
Financial_and_insurance_services 25 32021
Health_care_and_social_assistance 25 80994
Information_media_and_telecommunications 25 14702
Not Stated 25 29901
Other_services 25 24089
Professional_scientific_and_technical_services 25 64125
Rental_hiring_and_real_estate_services 25 11796
Retail_trade 25 61803
Mining 35 2441
Wholesale_trade 35 22199
Education_and_training 45 56125
Manufacturing 45 55206
Public_administration_and_safety 45 37747
Transport_postal_and_warehousing 45 32663
Agriculture_forestry_and_fishing 55 12733
## `summarise()` has grouped output by 'field'. You can override using the `.groups` argument.
Table 4: Field: Population
field age_min population
Mixed_Field_Programmes 15 1813
Architecture_and_Building 25 42510
Creative_Arts 25 40334
Food_Hospitality_and_Personal_Services 25 42938
Health 25 67630
Information_Technology 25 37535
Management_and_Commerce 25 150571
Natural_and_Physical_Sciences 25 22171
Not Stated 25 71440
Society_and_Culture 25 80932
Agriculture_Environment 35 13016
Engineering_and_Technologies 45 77524
Education 55 44696
NA NA 896
## `summarise()` has grouped output by 'occupation'. You can override using the `.groups` argument.
Table 5: Occupation: Population
occupation age_min population
Community_and_personal_service_workers 25 67104
Not Stated 25 11075
Professionals 25 190449
Sales_workers 25 51772
Technicians_and_trades_workers 25 99110
Managers 35 100601
Clerical_and_administrative_workers 45 89021
Labourers 45 49653
Machinery_operators_and_drivers 45 40922

The bar plots represent the SA4 regions and its working population with respect to their education levels, field of study, industry of employment and occupations. Each plot shows the region which had the highest population belonging to that subdivision.

## `summarise()` has grouped output by 'SA4_CODE_2016'. You can override using the `.groups` argument.
Education Level: Region

Figure 3: Education Level: Region

## `summarise()` has grouped output by 'SA4_CODE_2016'. You can override using the `.groups` argument.
Industry: Region

Figure 4: Industry: Region

## `summarise()` has grouped output by 'SA4_CODE_2016'. You can override using the `.groups` argument.
Field: Region

Figure 5: Field: Region

## `summarise()` has grouped output by 'SA4_CODE_2016'. You can override using the `.groups` argument.
Occupation: Region

Figure 6: Occupation: Region

The maps represent the SA4 regions and the distribution of population by their education levels, industries, field of study and occupations respectively.

## `summarise()` has grouped output by 'SA4_CODE_2016'. You can override using the `.groups` argument.
Spatial Education Level Distribution

Figure 7: Spatial Education Level Distribution

## `summarise()` has grouped output by 'SA4_CODE_2016'. You can override using the `.groups` argument.
Spatial Industry Distribution

Figure 8: Spatial Industry Distribution

## `summarise()` has grouped output by 'SA4_CODE_2016'. You can override using the `.groups` argument.
Spatial Study Field Distribution

<<<<<<< HEAD <<<<<<< HEAD <<<<<<< HEAD Figure 2: Spatial Industry Distribution ======= Figure 6: Spatial Industry Distribution <<<<<<< HEAD >>>>>>> fc57fe83b5555588ff478f7a4d72bf7679cd7c6a ======= <<<<<<< HEAD >>>>>>> c0dd0a2ac40797dee5be7afce1917377fcaea3cb >>>>>>> dccab1bb54ad0ece55764a1ffd96a1ae6c9e32d7 ======= Figure 5: Spatial Study Field Distribution >>>>>>> de6ccec71946224bd62212ac6377ff2c1d5cfdfc ======= Figure 9: Spatial Study Field Distribution >>>>>>> a05fdccabc3b1bece74ea828d66bbab9232b06c6 >>>>>>> report

## `summarise()` has grouped output by 'SA4_CODE_2016'. You can override using the `.groups` argument.
Spatial Occupation Distribution

Figure 10: Spatial Occupation Distribution

## `summarise()` has grouped output by 'SA4_CODE_2016'. You can override using the `.groups` argument.
## Joining, by = c("node", "category")

## `summarise()` has grouped output by 'SA4_CODE_2016'. You can override using the `.groups` argument.
## Joining, by = c("node", "category")

## `summarise()` has grouped output by 'SA4_CODE_2016'. You can override using the `.groups` argument.
## Joining, by = c("node", "category")

## `summarise()` has grouped output by 'SA4_CODE_2016'. You can override using the `.groups` argument.
## Joining, by = c("node", "category")

This section of the report represents the population of the SA4 regions aged 15 years and above. The population is represented by their Occupations and the Industries they are employed in and also by Sex and weekly working hours. The purpose is to determine whether males or females have worked for more hours, which industries have highest working population and region-wise which occupations had most number of employees.

For the analysis, the tables G52 and G58 were cleaned and relevant variables were renamed. For the cleaning process we excluded the total and population variables, converted the data frame into longer format and joined it with the geopath for SA4 geomap. We then summarized the minimum and maximum hours of the population working for industries as well as employees in occupations. The summarized data was plotted to compare the minimum and maximum working hours by gender.

We used the summarized data to plot the minimum hours worked by people with respect to each industry in a count plot. Furthermore, we summarized the number of people and grouped them by the SA4 codes for each region and arranged the data sets in descending order to plot region-wise bar plots representing the regions, education level, field of study, industries and occupations.

Lastly, we plotted the distribution of population by their education levels, industries, field of study and occupations on 4 maps respectively. The analysis involved mapping the geometric values of the regions on the sa4 geomap with respect to total population and the respective industries and occupations.

<<<<<<< HEAD
## `summarise()` has grouped output by 'SA4_CODE_2016'. You can override using the `.groups` argument.
<<<<<<< HEAD

<<<<<<< HEAD =======
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
## 
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
## 
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
## # A tibble: 1 x 1
##   hr_min
##    <dbl>
## 1 157320
## # A tibble: 1 x 1
##   hr_max
##    <dbl>
## 1 121600

It can be observed from fig @ref(fig:hr_plots) that overall females worked more than men. However, as the number of work-hours increased men have worked more than women.

It can be observed from fig @ref(fig:ind_hrs) that industries like health care, education and training, construction and Professional and technical services have more working population as the working hours increased. Mining, electricity, gas, water and agriculture forestry and fishing showed low working population irrespective of work hours.

## # A tibble: 1 x 1
##   hrs_min
##     <dbl>
## 1   70794
## # A tibble: 1 x 1
##   hrs_max
##     <dbl>
## 1   54720

It can be observed from fig @ref(fig:hrs_plots) that overall females worked more than men at all occupations. Although, for maximum hours worked, as number of working-hours increased, the number of men and women remained the same.

## `summarise()` has grouped output by 'SA4_CODE_2016'. You can override using the `.groups` argument.

It can be observed from fig ?? tha the most number of employees in the SA4 regions are employed in the occupations of Professionals, Managers and Technicians and trade workers. Professionals accounted for highest number of employees for region 206, while machinery operators and drivers accounted for the least number of employees for region 213 respectively.

<<<<<<< HEAD >>>>>>> fc57fe83b5555588ff478f7a4d72bf7679cd7c6a ======= <<<<<<< HEAD >>>>>>> c0dd0a2ac40797dee5be7afce1917377fcaea3cb >>>>>>> dccab1bb54ad0ece55764a1ffd96a1ae6c9e32d7 =======

======= >>>>>>> a05fdccabc3b1bece74ea828d66bbab9232b06c6

Conclusion

The education levels, field of study, industry of employment and occupation was studied for the Victorian SA4 level populations for the distributions according to gender and sex. The tables and plots were compared to mark the covariations between the population distributions.For example, the population trend between the field of study and industry of employment. Networks were drawn based on the population weights to analyze these trends. Some of the trends like more men were employed as managers when more women had studied management were found to be interesting. Cholropeth maps were made to analyze these trends spatially.

The goal of this report is to create a data story from these statistical summaries to enumerate the facts from the data and link them to the real world. The data provided by the Australian Bureau of Statistics is an aggregated open data and in no form identifies individuals who participated in the census. The ABS aims to integrate the census data with other datasets to make this census data more interesting. Thus, we aim to do the same and bring some interesting data stories as we progress building this report.

R Core Team (2021)

Xie (2021a) Dietrich (2020) Wickham et al. (2021),

Wickham (2021a),

Wickham et al. (2020),

Zhu (2021),

Xie (2021b),

Tierney et al. (2020),

Pedersen (2020),

Henry and Wickham (2020),

Wickham and Hester (2020),

Wickham and Seidel (2020),

Wickham (2019),

Müller and Wickham (2021),

Wickham (2021b),

Wickham (2021c),

Xie (2021c),

Tierney (2019),

Xie (2016),

Wickham (2016),

Xie (2015),

Xie (2014),

Wickham et al. (2019),

Xie (2019),

Tierney (2017)

Dietrich, Jan Philipp. 2020. Citation: Software Citation Tools. https://CRAN.R-project.org/package=citation.
Henry, Lionel, and Hadley Wickham. 2020. Purrr: Functional Programming Tools. https://CRAN.R-project.org/package=purrr.
Müller, Kirill, and Hadley Wickham. 2021. Tibble: Simple Data Frames. https://CRAN.R-project.org/package=tibble.
Pedersen, Thomas Lin. 2020. Patchwork: The Composer of Plots. https://CRAN.R-project.org/package=patchwork.
R Core Team. 2021. R: A Language and Environment for Statistical Computing. Vienna, Austria: R Foundation for Statistical Computing. https://www.R-project.org/.
Tierney, Nicholas. 2017. “Visdat: Visualising Whole Data Frames.” JOSS 2 (16): 355. https://doi.org/10.21105/joss.00355.
———. 2019. Visdat: Preliminary Visualisation of Data. https://CRAN.R-project.org/package=visdat.
Tierney, Nicholas, Di Cook, Miles McBain, and Colin Fay. 2020. Naniar: Data Structures, Summaries, and Visualisations for Missing Data. https://github.com/njtierney/naniar.
Wickham, Hadley. 2016. Ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York. https://ggplot2.tidyverse.org.
———. 2019. Stringr: Simple, Consistent Wrappers for Common String Operations. https://CRAN.R-project.org/package=stringr.
———. 2021a. Forcats: Tools for Working with Categorical Variables (Factors). https://CRAN.R-project.org/package=forcats.
———. 2021b. Tidyr: Tidy Messy Data. https://CRAN.R-project.org/package=tidyr.
———. 2021c. Tidyverse: Easily Install and Load the Tidyverse. https://CRAN.R-project.org/package=tidyverse.
Wickham, Hadley, Mara Averick, Jennifer Bryan, Winston Chang, Lucy D’Agostino McGowan, Romain François, Garrett Grolemund, et al. 2019. “Welcome to the tidyverse.” Journal of Open Source Software 4 (43): 1686. https://doi.org/10.21105/joss.01686.
Wickham, Hadley, Winston Chang, Lionel Henry, Thomas Lin Pedersen, Kohske Takahashi, Claus Wilke, Kara Woo, Hiroaki Yutani, and Dewey Dunnington. 2020. Ggplot2: Create Elegant Data Visualisations Using the Grammar of Graphics. https://CRAN.R-project.org/package=ggplot2.
Wickham, Hadley, Romain François, Lionel Henry, and Kirill Müller. 2021. Dplyr: A Grammar of Data Manipulation. https://CRAN.R-project.org/package=dplyr.
Wickham, Hadley, and Jim Hester. 2020. Readr: Read Rectangular Text Data. https://CRAN.R-project.org/package=readr.
Wickham, Hadley, and Dana Seidel. 2020. Scales: Scale Functions for Visualization. https://CRAN.R-project.org/package=scales.
Wikipedia contributors. 2021. “Census — Wikipedia, the Free Encyclopedia.” https://en.wikipedia.org/w/index.php?title=Census&oldid=1023830734.
Xie, Yihui. 2014. “Knitr: A Comprehensive Tool for Reproducible Research in R.” In Implementing Reproducible Computational Research, edited by Victoria Stodden, Friedrich Leisch, and Roger D. Peng. Chapman; Hall/CRC. http://www.crcpress.com/product/isbn/9781466561595.
———. 2015. Dynamic Documents with R and Knitr. 2nd ed. Boca Raton, Florida: Chapman; Hall/CRC. https://yihui.org/knitr/.
———. 2016. Bookdown: Authoring Books and Technical Documents with R Markdown. Boca Raton, Florida: Chapman; Hall/CRC. https://bookdown.org/yihui/bookdown.
———. 2019. “TinyTeX: A Lightweight, Cross-Platform, and Easy-to-Maintain LaTeX Distribution Based on TeX Live.” TUGboat, no. 1: 30–32. http://tug.org/TUGboat/Contents/contents40-1.html.
———. 2021a. Bookdown: Authoring Books and Technical Documents with r Markdown. https://CRAN.R-project.org/package=bookdown.
———. 2021b. Knitr: A General-Purpose Package for Dynamic Report Generation in r. https://yihui.org/knitr/.
———. 2021c. Tinytex: Helper Functions to Install and Maintain TeX Live, and Compile LaTeX Documents. https://github.com/yihui/tinytex.
Zhu, Hao. 2021. kableExtra: Construct Complex Table with Kable and Pipe Syntax. https://CRAN.R-project.org/package=kableExtra.
<<<<<<< HEAD
>>>>>>> de6ccec71946224bd62212ac6377ff2c1d5cfdfc ======= >>>>>>> a05fdccabc3b1bece74ea828d66bbab9232b06c6 >>>>>>> report \end{document}